
> library(survival)

> library(parallel)

> library(data.table)

> load("data/analysis_data.RData")

> source("R/00-functions.R")

> RNGkind("L'Ecuyer-CMRG")

> set.seed(174071644)

> # main dataset ----
> data_main <- respondent_candidate_data[main == 1]

> n_main <- data_main[, length(unique(subject_id))]

> data_main[, subject_id := rep(1:n_main, each = 7)]

> model_main <- coxph(
+   Surv(8-ranking, status) ~
+     preferences * electability_last +
+     electability * electability_last +
+     expected_utility * electability_last +
+     strata(subject_id) - electability_last,
+   data = data_main,
+   id = subject_id)

> ids <- sort(unique(data_main$subject_id))

> data_list_main <- lapply(ids, function(x) {
+   data_main[subject_id == x]
+ })

> n <- length(data_list_main)

> boot <- function(i) {
+   boot_i <- sample(n, replace = TRUE)
+   data <- rbindlist(lapply(1:n, function(i) {
+     out <- copy(data_list_main[[boot_i[i]]])
+     out[, subject_id := i]
+     out
+   }))
+   coxph(
+     Surv(8-ranking, status) ~
+       preferences * electability_last +
+       electability * electability_last +
+       expected_utility * electability_last +
+       strata(subject_id) - electability_last,
+     data = data,
+     id = subject_id)
+ }

> boots_main <- mclapply(1:1000, boot, mc.cores = 6)

> coefs_main <- rbindlist(lapply(boots_main,
+   function(x) as.data.table(rbind(coef(x)))))

> coefs_main <- coefs_main[, .(
+   variable = c("Preferences", "Electability", "Expected Utility",
+     "Preferences $\\times$ Electability Last",
+     "Electability $\\times$ Electability Last",
+     "Expected Utility $\\times$ Electability Last"),
+   est = c(
+     mean(`preferences`),
+     mean(`electability`),
+     mean(`expected_utility`),
+     mean(`preferences:electability_last`),
+     mean(`electability:electability_last`),
+     mean(`expected_utility:electability_last`)),
+   q025 = c(
+     quantile(`preferences`, .025),
+     quantile(`electability`, .025),
+     quantile(`expected_utility`, .025),
+     quantile(`preferences:electability_last`, .025),
+     quantile(`electability:electability_last`, .025),
+     quantile(`expected_utility:electability_last`, .025)),
+   q975 = c(
+     quantile(`preferences`, .975),
+     quantile(`electability`, .975),
+     quantile(`expected_utility`, .975),
+     quantile(`preferences:electability_last`, .975),
+     quantile(`electability:electability_last`, .975),
+     quantile(`expected_utility:electability_last`, .975))
+ )]

> for (i in 1:nrow(coefs_main)) {
+   coefs_main[i, cat(
+     variable,
+     "&$", sprintf("%.03f", est), "$",
+     "&$[", sprintf("%.03f", q025), ",", sprintf("%.03f", q975), "]$",
+     "\\\\\n")]
+ }
Preferences &$ -0.052 $ &$[ -0.065 , -0.041 ]$ \\
Electability &$ -0.001 $ &$[ -0.009 , 0.006 ]$ \\
Expected Utility &$ -0.044 $ &$[ -0.057 , -0.030 ]$ \\
Preferences $\times$ Electability Last &$ 0.015 $ &$[ -0.000 , 0.030 ]$ \\
Electability $\times$ Electability Last &$ -0.023 $ &$[ -0.034 , -0.011 ]$ \\
Expected Utility $\times$ Electability Last &$ 0.010 $ &$[ -0.010 , 0.028 ]$ \\

> # lp <- predict(model_main)
> # all_rankings <- rbind(1:7, permute::allPerms(7))
> # calculate_expected_rankings <- function(linear_predictor) {
> #   do.call(c, mclapply(1:n_main, function(i) {
> #     scores <- lp[((i - 1) * 7 + 1):(i * 7)]
> #     probs <- matrix(NA, nrow = 5040, ncol = 7)
> #     fill_probs_column_k <- function(k) {
> #       if (k == 1) {
> #         p1 <- exp(scores) / sum(exp(scores))
> #         p1[all_rankings[, 1]]
> #       } else {
> #         sapply(1:5040, function(row) {
> #           remaining_candidates <- setdiff(1:7, all_rankings[row, 1:(k-1)])
> #           exp_scores <- exp(scores[remaining_candidates])
> #           pk <- exp_scores / sum(exp_scores)
> #           pk[match(all_rankings[row, k], remaining_candidates)]
> #         })
> #       }
> #     }
> #     for (k in 1:7) {
> #       probs[, k] <- fill_probs_column_k(k)
> #     }
> #     probs <- apply(probs, 1, prod)
> #     expected_ranking <- sapply(1:7, function(candidate) {
> #       sum(sapply(1:5040, function(row) {
> #         candidate_rank <- which(all_rankings[row, ] == candidate)
> #         probs[row] * candidate_rank
> #       }))
> #     })
> #     expected_ranking
> #   }, mc.cores = 6))
> # }
> # expected_rankings <- calculate_expected_rankings(lp)
> # difference in average linear predictor for women/men by elicitation order
> 
> # full dataset ----
> data_full <- respondent_candidate_data

> data_full[, subject_id := rep(1:1211, each = 7)]

> model_full <- coxph(
+   Surv(8-ranking, status) ~
+     preferences * electability_last +
+     electability * electability_last +
+     expected_utility * electability_last +
+     strata(subject_id) - electability_last,
+   data = data_full,
+   id = subject_id)

> ids <- sort(unique(data_full$subject_id))

> data_list_full <- lapply(ids, function(x) {
+   data_full[subject_id == x]
+ })

> n <- length(data_list_full)

> boot <- function(i) {
+   boot_i <- sample(n, replace = TRUE)
+   data <- rbindlist(lapply(1:n, function(i) {
+     out <- copy(data_list_full[[boot_i[i]]])
+     out[, subject_id := i]
+     out
+   }))
+   coxph(
+     Surv(8-ranking, status) ~
+       preferences * electability_last +
+       electability * electability_last +
+       expected_utility * electability_last +
+       strata(subject_id) - electability_last,
+     data = data,
+     id = subject_id)
+ }

> boots_full <- mclapply(1:1000, boot, mc.cores = 6)

> coefs_full <- rbindlist(lapply(boots_full,
+   function(x) as.data.table(rbind(coef(x)))))

> coefs_full[, .(
+   type = c("preferences", "electability", "expected utility"),
+   est = c(
+     mean(`preferences:electability_last`),
+     mean(`electability:electability_last`),
+     mean(`expected_utility:electability_last`)),
+   q025 = c(
+     quantile(`preferences:electability_last`, .025),
+     quantile(`electability:electability_last`, .025),
+     quantile(`expected_utility:electability_last`, .025)),
+   q975 = c(
+     quantile(`preferences:electability_last`, .975),
+     quantile(`electability:electability_last`, .975),
+     quantile(`expected_utility:electability_last`, .975))
+ )]
               type          est         q025         q975
             <char>        <num>        <num>        <num>
1:      preferences  0.017109054  0.007423259  0.027891637
2:     electability -0.013078081 -0.020539349 -0.005399446
3: expected utility  0.005008311 -0.006800750  0.017442393

> # difference in average linear predictor for women/men by elicitation order
> lps_full <- do.call(cbind, lapply(boots_full, function(x) {
+   predict(x, newdata = data_full)
+ }))

> women_full <- rep(c(F, T, F, F, F, T, F), 1211)

> quantile(colMeans(lps_full[women_full, ]), c(.025, .975))
     2.5%     97.5% 
0.2161959 0.3763089 

> quantile(colMeans(lps_full[!women_full, ]), c(.025, .975))
       2.5%       97.5% 
-0.19452757 -0.04053927 

> # non-reversed rankings ----
> # main dataset ----
> data_main_rev <- respondent_candidate_data[main == 1]

> data_main_rev[, subject_id := rep(1:833, each = 7)]

> model_main_rev <- coxph(
+   Surv(ranking, status) ~
+     preferences * electability_last +
+     electability * electability_last +
+     expected_utility * electability_last +
+     strata(subject_id) - electability_last,
+   data = data_main_rev,
+   id = subject_id)

> ids <- sort(unique(data_main_rev$subject_id))

> data_list_main_rev <- lapply(ids, function(x) {
+   data_main_rev[subject_id == x]
+ })

> n <- length(data_list_main_rev)

> boot <- function(i) {
+   boot_i <- sample(n, replace = TRUE)
+   data <- rbindlist(lapply(1:n, function(i) {
+     out <- copy(data_list_main_rev[[boot_i[i]]])
+     out[, subject_id := i]
+     out
+   }))
+   coxph(
+     Surv(ranking, status) ~
+       preferences * electability_last +
+       electability * electability_last +
+       expected_utility * electability_last +
+       strata(subject_id) - electability_last,
+     data = data,
+     id = subject_id)
+ }

> boots_main_rev <- mclapply(1:1000, boot, mc.cores = 6)

> coefs_main_rev <- rbindlist(lapply(boots_main_rev,
+   function(x) as.data.table(rbind(coef(x)))))

> coefs_main_rev <- coefs_main_rev[, .(
+   variable = c("Preferences", "Electability", "Expected Utility",
+     "Preferences $\\times$ Electability Last",
+     "Electability $\\times$ Electability Last",
+     "Expected Utility $\\times$ Electability Last"),
+   est = c(
+     mean(`preferences`),
+     mean(`electability`),
+     mean(`expected_utility`),
+     mean(`preferences:electability_last`),
+     mean(`electability:electability_last`),
+     mean(`expected_utility:electability_last`)),
+   q025 = c(
+     quantile(`preferences`, .025),
+     quantile(`electability`, .025),
+     quantile(`expected_utility`, .025),
+     quantile(`preferences:electability_last`, .025),
+     quantile(`electability:electability_last`, .025),
+     quantile(`expected_utility:electability_last`, .025)),
+   q975 = c(
+     quantile(`preferences`, .975),
+     quantile(`electability`, .975),
+     quantile(`expected_utility`, .975),
+     quantile(`preferences:electability_last`, .975),
+     quantile(`electability:electability_last`, .975),
+     quantile(`expected_utility:electability_last`, .975))
+ )]

> # difference in average linear predictor for women/men by elicitation order
> lps_main_rev <- do.call(cbind, lapply(boots_main_rev, function(x) {
+   predict(x, newdata = data_main_rev)
+ }))

> women_main_rev <- rep(c(F, T, F, F, F, T, F), 833)

> quantile(colMeans(lps_main_rev[women_main_rev, ]), c(.025, .975))
      2.5%      97.5% 
-0.4331679 -0.2181654 

> quantile(colMeans(lps_main_rev[!women_main_rev, ]), c(.025, .975))
      2.5%      97.5% 
0.03376202 0.23272063 

> # full dataset ----
> data_full_rev <- respondent_candidate_data

> data_full_rev[, subject_id := rep(1:1211, each = 7)]

> model_full_rev <- coxph(
+   Surv(ranking, status) ~
+     preferences * electability_last +
+     electability * electability_last +
+     expected_utility * electability_last +
+     strata(subject_id) - electability_last,
+   data = data_full_rev,
+   id = subject_id)

> ids <- sort(unique(data_full_rev$subject_id))

> data_list_full_rev <- lapply(ids, function(x) {
+   data_full_rev[subject_id == x]
+ })

> n <- length(data_list_full_rev)

> boot <- function(i) {
+   boot_i <- sample(n, replace = TRUE)
+   data <- rbindlist(lapply(1:n, function(i) {
+     out <- copy(data_list_full_rev[[boot_i[i]]])
+     out[, subject_id := i]
+     out
+   }))
+   coxph(
+     Surv(8-ranking, status) ~
+       preferences * electability_last +
+       electability * electability_last +
+       expected_utility * electability_last +
+       strata(subject_id) - electability_last,
+     data = data,
+     id = subject_id)
+ }

> boots_full_rev <- mclapply(1:1000, boot, mc.cores = 6)

> coefs_full_rev <- rbindlist(lapply(boots_full_rev,
+   function(x) as.data.table(rbind(coef(x)))))

> coefs_full_rev[, .(
+   type = c("preferences", "electability", "expected utility"),
+   est = c(
+     mean(`preferences:electability_last`),
+     mean(`electability:electability_last`),
+     mean(`expected_utility:electability_last`)),
+   q025 = c(
+     quantile(`preferences:electability_last`, .025),
+     quantile(`electability:electability_last`, .025),
+     quantile(`expected_utility:electability_last`, .025)),
+   q975 = c(
+     quantile(`preferences:electability_last`, .975),
+     quantile(`electability:electability_last`, .975),
+     quantile(`expected_utility:electability_last`, .975))
+ )]
               type          est         q025         q975
             <char>        <num>        <num>        <num>
1:      preferences  0.017109054  0.007423259  0.027891637
2:     electability -0.013078081 -0.020539349 -0.005399446
3: expected utility  0.005008311 -0.006800750  0.017442393

> # difference in average linear predictor for women/men by elicitation order
> lps_full_rev <- do.call(cbind, lapply(boots_full_rev, function(x) {
+   predict(x, newdata = data_full_rev)
+ }))

> women_full_rev <- rep(c(F, T, F, F, F, T, F), 1211)

> quantile(colMeans(lps_full_rev[women_full_rev, ]), c(.025, .975))
     2.5%     97.5% 
0.2161959 0.3763089 

> quantile(colMeans(lps_full_rev[!women_full_rev, ]), c(.025, .975))
       2.5%       97.5% 
-0.19452757 -0.04053927 

> # comparison ----
> AIC(model_main)
[1] 8861.377

> AIC(model_main_rev)
[1] 9138.734

> AIC(model_full)
[1] 14308.95

> AIC(model_full_rev)
[1] 14713.96
